
# this code just runs the relevant code from the Rmd files
# to sample policies. At the bottom, it iterates it a bunch 
# of times to estimate (via mc simulation) the design weights.

# code that samples polit

# disproportionate sampling of questions
# setwd("c:/users/thomas/dropbox/methods/cueeffectcomparisons/roper coding")
# rmarkdown::render("sampling.Rmd", quiet = TRUE)

# load packages
requireNamespace("rio", quietly = TRUE)

# load dataset of coded issues
issues <- rio::import("sampling-policies/Roper 2016 Full_v7.xlsx", which = "Roper 2016 Full")
issues <- issues[issues$Partisanship != "N", , drop = FALSE]

knitr::kable(cbind(names(table(issues$Category)),
                   apply(with(issues, table(Category, Issue)), 1L, function(x) sum(x != 0)),
                   table(issues$Category),
                   table(issues[!duplicated(issues[c("Issue", "Policy")]), "Category"])
),
row.names = FALSE,
col.names = c("Category", "Issues", "Policies (w/ Duplicates)", "Policies (w/o Duplicates)"), 
format = "markdown")

# divide by category
issues_subset <- issues[!duplicated(issues[c("Issue", "Policy")]), , drop = FALSE]
issues_list <- list(
  economic = issues_subset[issues_subset$Category == "economic",],
  'foreign policy' = issues_subset[issues_subset$Category == "foreign policy",],
  social = issues_subset[issues_subset$Category == "social",]
)
knitr::kable(with(data.frame(table(issues_subset$Issue, issues_subset$Category)), table(Var2, Freq))[,-1L], format = "markdown")

issue_name_vec <- names(table(issues_subset$Issue))
tmp <- cbind.data.frame(
  issue = issue_name_vec,
  n1 = as.data.frame(table(issues[["Issue"]])[issue_name_vec])[[2L]],
  n2 = as.data.frame(table(issues_subset[["Issue"]])[issue_name_vec])[[2L]]
)
knitr::kable(tmp[order(tmp$n2, tmp$issue, decreasing = TRUE),],
             row.names = FALSE,
             col.names = c("Issue", "Policies (w/ Duplicates)", "Policies (w/o Duplicates)"),
             format = "markdown")
rm(tmp)

# drop duplicated policies
issues <- issues_subset
issues_list <- list(
  economic = issues[issues$Category == "economic",],
  'foreign policy' = issues[issues$Category == "foreign policy",],
  social = issues[issues$Category == "social",]
)

knitr::kable(data.frame(
  # category names
  Category = names(issues_list),
  # number of issues per category
  Issues = unlist(lapply(issues_list, function(x) length(unique(x$Issue)))),
  # number of policies per category
  Policies = unlist(lapply(issues_list, nrow)),
  # number of issues to sample from category
  Thresholds = c(16, 8, 24),
  row.names = 1:3
), row.names = FALSE, 
format = "markdown")

# define function to do the sampling
do_sampling <-
  function(
    x, # data frame of policies
    category, # category to sample from
    threshold
  ) {
    # subset x to issue
    this_category <- x[x$Category == category, c("QuestionID", "QuestionTxt", "Category", "Issue", "Policy", "Partisanship"), drop = FALSE]
    
    # define list to stored sampled policies
    sampled <- list()
    
    # define integer counting number of sampled policies
    n_sampled <- 0L
    
    # while `n_sampled` < threshold, sample one issue w/o replacement, then sample policies from it
    while ((nrow(this_category) >= 1L) && (n_sampled < threshold)) {
      
      # sample an issue from this category
      sampled_issue_name <- sample(unique(this_category$Issue), 1L)
      sampled_issue <- this_category[this_category$Issue %in% sampled_issue_name, , drop = FALSE]
      
      ## drop the issue from category (thus: sampling w/o replacement)
      this_category <- this_category[!this_category$Issue %in% sampled_issue_name, , drop = FALSE]
      
      # sample policies from the issue based upon number of policies in issue
      if (nrow(sampled_issue) == 1L) {
        # sample policy from issue
        sampled[[length(sampled) + 1L]] <- sampled_issue
        
        # increment `n_sampled` and `entry`
        n_sampled <- n_sampled + 1L
        
      } else if (nrow(sampled_issue) == 2L) {
        # sample policy from issue
        #sampled[[length(sampled) + 1L]] <- sampled_issue
        sampled[[length(sampled) + 1L]] <- sampled_issue[sample(seq_len(nrow(sampled_issue)), 1L), , drop = FALSE]
        
        # increment `n_sampled` and `entry`
        n_sampled <- n_sampled + 1L
        
      } else {
        # sample three issues
        sample_indices <- sample(seq_len(nrow(sampled_issue)), 3L, FALSE)
        for (i in 1:3) {
          sampled[[length(sampled) + 1L]] <- sampled_issue[sample_indices[i], , drop = FALSE]
          # increment `n_sampled` and `entry`
          n_sampled <- n_sampled + 1L
        }
      }
    }
    
    # return data frame
    do.call("rbind.data.frame", sampled)
  }

# do sampling
set.seed(20180720)
sampled_policies <- list(
  economic = do_sampling(issues, "economic", threshold = 16L),
  foreign = do_sampling(issues, "foreign policy", threshold = 8L),
  social = do_sampling(issues, "social", threshold = 24L)
)
issue_sample <- do.call("rbind.data.frame", sampled_policies)
issue_sample <- issue_sample[order(issue_sample$Category, issue_sample$Issue, issue_sample$Policy), ]
rownames(issue_sample) <- seq_len(nrow(issue_sample))


sample_policies_fn <- function() {
  sampled_policies <- list(
    economic = do_sampling(issues, "economic", threshold = 16L),
    foreign = do_sampling(issues, "foreign policy", threshold = 8L),
    social = do_sampling(issues, "social", threshold = 24L)
  )
  issue_sample <- do.call("rbind.data.frame", sampled_policies)
  issue_sample <- issue_sample[order(issue_sample$Category, issue_sample$Issue, issue_sample$Policy), ]
  rownames(issue_sample) <- seq_len(nrow(issue_sample))
  return(issue_sample)
}


# this is the bit below that generates the weights

library(kableExtra)
library(tidyverse)

mod_df <- read_rds("output/respondent-data.rds") %>%
  mutate(issue_id = str_c("Issue ", issue_id)) %>%
  left_join(read_csv("output/issue-meta-w-awareness.csv")) %>%
  select(Policy = policy, Stem = stem, Category = category, Issue = issue) %>%
  distinct() %>%
  glimpse()

wts_df <- 1:10000 %>%
  map(~ sample_policies_fn()) %>%
  imap(~ mutate(.x, data_set_index = .y)) %>%
  bind_rows() %>% glimpse

wts2_df <- wts_df %>%
  mutate(Issue = str_to_title(Issue), 
         Category = str_to_title(Category)) %>%
  group_by(Policy, Issue, Category) %>%
  summarize(pr_inclusion = n()/max(data_set_index)) %>%
  ungroup() %>% 
  mutate(Issue = reorder(Issue, pr_inclusion),
         Policy = reorder(Policy, pr_inclusion)) %>% 
  arrange(Issue, Policy, pr_inclusion) %>% 
  group_by(Issue, Category) %>%
  select(-Policy) %>%
  summarize_all(mean) %>% 
  ungroup() %>%
  rename(issue = Issue) %>%
  mutate(design_weight = 1/pr_inclusion,
         issue = reorder(issue, pr_inclusion)) %>%
  write_rds("output/weights.rds") %>%
  glimpse() 

